import numpy as np
import pandas as pd
import scanpy as sc
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings('ignore')
import logging
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=130)
wt = sc.read_h5ad('./output/wt.preprocessing.h5ad')
wt.shape
#hvgs = adata_ko.var['highly_variable'].intersection(adata_wt.var['highly_variable'])
sc.pp.highly_variable_genes(wt, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(wt)
np.sum(wt.var['highly_variable'])
#wt = wt[:, wt.var['highly_variable']]
#sc.pp.regress_out(wt, ['n_counts'])
sc.pp.scale(wt, max_value=10)
sc.tl.pca(wt, svd_solver='arpack')
sc.pl.pca_variance_ratio(wt, log=True)
sc.pp.neighbors(wt)
sc.tl.umap(wt)
wt.obs
sc.tl.louvain(wt, resolution=1.6)
sc.pl.umap(wt, color=['louvain','n_genes','n_counts'])
mean_cellType = np.empty((len(wt.obs['louvain'].unique()), wt.raw.shape[1]),
dtype=float, order='C')
raw_adata = wt.raw.X.toarray()
for i in range(0, len(wt.obs['louvain'].unique())):
#print(adata.obs['phenograph'].unique()[i])
mean_cellType[i,:] = np.mean(raw_adata[wt.obs['louvain'] == wt.obs['louvain'].unique()[i], :], axis = 0)
mean_df = pd.DataFrame(np.corrcoef(mean_cellType), index = wt.obs['louvain'].unique(), columns = wt.obs['louvain'].unique())
import seaborn
ax = seaborn.clustermap(mean_df, cmap="jet").fig.suptitle('louvain')
sc.tl.rank_genes_groups(wt, 'louvain', method='wilcoxon')
sc.pl.rank_genes_groups(wt, n_genes=25, sharey=False)
pd.DataFrame(wt.uns['rank_genes_groups']['names']).head(10)
sc.pl.umap(wt, color=['Mki67', 'Top2a', 'Ube2c'], color_map="jet") # Cycling
sc.pl.umap(wt, color=['Lef1','Itm2a'], color_map="jet") # NKT0
sc.pl.umap(wt, color=['Plac8','Icos'], color_map="jet") # NKT2
sc.pl.umap(wt, color=['Tmem176a','Emb'], color_map="jet") # NKT17
sc.pl.umap(wt, color=['Klrb1c','Il2rb'], color_map="jet") # NKT1
sc.pl.umap(wt, color=['Ifit1','Ifit3','Isg15'], color_map="jet") # NKT1d
sc.pl.umap(wt, color=['Fhl2'], color_map="jet") # NKT2a
sc.pl.umap(wt, color=['louvain'], legend_loc = 'on data', legend_fontsize = 6)
marker_genes = ['Lef1','Itm2a','Mki67','Top2a','Zbtb16','Plac8','Izumo1r','Ifit1','Ifit3','Isg15','Ly6a','Fhl2','Nkg7','Fcer1g','Gzma','Gzmb','Rorc','Serpinb1a','Tmem176a','Tmem176b']
ax = sc.pl.dotplot(wt, marker_genes, groupby='louvain')
new_cluster_names = {
'0':'NKT1c', '1':'NKT1b', '2':'NKT1b', '3':'NKT2b', '4':'NKT2a',
'5':'NKT1b','6':'NKT1a', '7':'NKT1d', '8':'NKT1b', '9':'NKT1b',
'10':'NKT17', '11':'Cycling NKT', '12':'NKT0'}
vect = []
for i in range(0, len(wt.obs['louvain'])):
vect = vect + [new_cluster_names[str(wt.obs['louvain'][i])]]
wt.obs['cell_type'] = vect
# or
#adata.obs['louvain'].cat.categories
#adata.rename_categories('louvain', ['TA', 'EP (early)', 'Stem', 'Goblet', 'EP (stress)', 'Enterocyte', 'Paneth', 'Enteroendocrine', 'Tuft'])
#adata.obs['louvain'].value_counts()
sc.pl.umap(wt, color='cell_type', title='', frameon=False)
wt.obs['cell_type'].cat.categories
wt.obs['cell_type'].cat.reorder_categories(['NKT0','Cycling NKT','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c','NKT1d'], inplace = True)
wt.uns['cell_type_colors'] = [ '#A31E22', # NKT0
'#F3766E', # Cycling NKT
'#2da9d2', # NKT17
'#FEC85A', # NKT2a
'#FAE600', # NKT2b
'#50C878', # NKT1a
'#2E8B57', # NKT1b
'#0B6623', # NKT1c
'#4CBB17'] # NKT1d
sc.pl.umap(wt, color='cell_type', title='wt', frameon=False)
wt.obs.groupby(["cell_type"]).apply(len)
wt.shape
sc.tl.rank_genes_groups(wt, 'cell_type', method='wilcoxon', n_genes=200)
pd.DataFrame(wt.uns['rank_genes_groups']['names']).head(20)
cell_type_nb = {}
list_cell_type = wt.obs['cell_type'].unique().tolist()
list_cell_type.sort()
for i in range(0, len(wt.obs['cell_type'].unique().tolist())):
cell_type_nb[list_cell_type[i]] = i
#inverted_cell_type_nb = dict([[v,k] for k,v in cell_type_nb.items()])
#inverted_cell_type_nb
clusters = []
genes = []
logFC = []
score = []
pvals = []
pvals_adj = []
for cl in cell_type_nb.keys():
clusters = clusters + ([cl]*len(wt.uns['rank_genes_groups']['names'][str(cl)]))
genes = genes + wt.uns['rank_genes_groups']['names'][str(cl)].tolist()
logFC = logFC + wt.uns['rank_genes_groups']['logfoldchanges'][str(cl)].tolist()
score = score + wt.uns['rank_genes_groups']['scores'][str(cl)].tolist()
pvals = pvals + wt.uns['rank_genes_groups']['pvals'][str(cl)].tolist()
pvals_adj = pvals_adj + wt.uns['rank_genes_groups']['pvals_adj'][str(cl)].tolist()
markers = pd.DataFrame(data = {'clusters': clusters,
'genes':genes,
'logFC':logFC,
'score':score,
'pvals':pvals,
'pvals_adj':pvals_adj,
})
markers.to_csv(path_or_buf = 'wt.markers.csv', sep = ',', index = False)
marker_genes = ['Lef1','Itm2a','Mki67','Top2a','Zbtb16','Plac8','Izumo1r','Ifit1','Ifit3','Isg15','Ly6a','Fhl2','Nkg7','Fcer1g','Gzma','Gzmb','Rorc','Serpinb1a','Tmem176a','Tmem176b']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type')
marker_genes = ['Blk', 'Rorc', 'Tbx21', 'Tox', 'Zbtb16', 'Gata3', 'Sox4','Lef1']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type', save='fig.1h.pdf')
#sc.pl.matrixplot(wt, marker_genes, groupby='cell_type', dendrogram=True,
# use_raw=False, vmin=-3, vmax=3, cmap='bwr', swap_axes=True, figsize=(5,6))
sc.pl.matrixplot(wt, marker_genes, groupby='cell_type', cmap='bwr')
wt.write('./output/wt.ann.h5ad')
wt.obs['umap_1'] = pd.DataFrame(wt.obsm['X_umap']).iloc[:,0].tolist()
wt.obs['umap_2'] = pd.DataFrame(wt.obsm['X_umap']).iloc[:,1].tolist()
wt.obs.to_csv(path_or_buf = 'output/metadata.wt.tsv', sep = '\t', index = True)
sc.external.exporting.cellbrowser(wt,
'cellbrowser/wt',
'wt',
annot_keys=['cell_type', 'louvain', 'sample',
'n_counts', 'n_genes', 'percent_mito', 'percent_ribo'],
cluster_field='cell_type', nb_marker=30,
skip_matrix=False, do_debug=True)
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
cell_bool = cell_bool + [x in ['NKT0','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c']]
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
marker_genes = ['Sdc1', 'Klrb1c', 'Ly6a', 'Izumo1r','Cd24a']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type', save='fig.5a.pdf')
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
cell_bool = cell_bool + [x in ['NKT0','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c']]
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
marker_genes = ['S1pr1', 'Klf2', 'Emp3', 'S100a4', 'S100a6', 'Cd69', 'Cxcr3']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type', save='fig.6a.pdf')
mean_cellType = np.empty((len(wt.obs['cell_type'].unique()), wt.raw.shape[1]),
dtype=float, order='C')
raw_adata = wt.raw.X.toarray()
for i in range(0, len(wt.obs['cell_type'].unique())):
#print(adata.obs['phenograph'].unique()[i])
mean_cellType[i,:] = np.mean(raw_adata[wt.obs['cell_type'] == wt.obs['cell_type'].unique()[i], :], axis = 0)
mean_df = pd.DataFrame(np.corrcoef(mean_cellType), index = wt.obs['cell_type'].unique(), columns = wt.obs['cell_type'].unique())
ax = sns.clustermap(mean_df, cmap="bwr")
ax.savefig("figure.6c.pdf")
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
cell_bool = cell_bool + [x in ['NKT2a','NKT2b']]
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
sc.tl.rank_genes_groups(wt, groupby='cell_type', key_added='group_DE_results', n_genes=200)
cell_type_nb = {}
list_cell_type = wt.obs['cell_type'].unique().tolist()
list_cell_type.sort()
for i in range(0, len(wt.obs['cell_type'].unique().tolist())):
cell_type_nb[list_cell_type[i]] = i
#inverted_cell_type_nb = dict([[v,k] for k,v in cell_type_nb.items()])
#inverted_cell_type_nb
clusters = []
genes = []
logFC = []
score = []
pvals = []
pvals_adj = []
for cl in cell_type_nb.keys():
clusters = clusters + ([cl]*len(wt.uns['group_DE_results']['names'][str(cl)]))
genes = genes + wt.uns['group_DE_results']['names'][str(cl)].tolist()
logFC = logFC + wt.uns['group_DE_results']['logfoldchanges'][str(cl)].tolist()
score = score + wt.uns['group_DE_results']['scores'][str(cl)].tolist()
pvals = pvals + wt.uns['group_DE_results']['pvals'][str(cl)].tolist()
pvals_adj = pvals_adj + wt.uns['group_DE_results']['pvals_adj'][str(cl)].tolist()
markers = pd.DataFrame(data = {'clusters': clusters,
'genes':genes,
'logFC':logFC,
'score':score,
'pvals':pvals,
'pvals_adj':pvals_adj,
})
markers.to_csv(path_or_buf = 'nkt2.markers.csv', sep = ',', index = False)
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2a"], gene_names=wt.uns['group_DE_results']['names']["NKT2a"][1:10])
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2a"], gene_names=wt.uns['group_DE_results']['names']["NKT2a"][90:100])
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2b"], gene_names=wt.uns['group_DE_results']['names']["NKT2b"][1:10])
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2b"], gene_names=wt.uns['group_DE_results']['names']["NKT2b"][90:100])
nkt2_genes = ['Tesc','Lgals1','S100a6','Vim','Thy1','Crip1','Lmo4','Pdlim1','Rgcc','Txn1','Stmn1','Emp3','Arl4c','Cxcr6','Igfbp4','Rora','Lgals3','H2afz','Cfl1','Cox6c','Ccr9','Dbi','Prdx1','Hmgn2','Dut','Ldha','Emb','Gpx1','Ppia','Izumo1r','Tpt1','Hcst','Ctsw','Malat1','Ctsd','Btg1','Nkg7','Klrb1c','Cmtm7','Maf','S100a11','H2-D1','H2-K1','H2-Q7','Eef1a1','AW112010','Ypel3','Cd53','Gimap4','Smpdl3a','Fxyd5','Serinc3','Srgn','Gimap5','Ptprcap','Cxcr3','Rsrp1','Pfdn5']
sc.pl.matrixplot(wt, nkt2_genes, groupby='cell_type', save='figure.1sup.pdf')
ax = sc.pl.dotplot(wt, nkt2_genes, groupby='cell_type', save='figure.1sup-2.pdf')
result = wt.uns['group_DE_results']
groups = result['names'].dtype.names
pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names', 'logfoldchanges', 'scores', 'pvals']}).head(5)
sc.pl.umap(wt, color=['Rora','Cxcr6','Izumo1r',"Nkg7"], color_map="jet") # NKT1
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
cell_bool = cell_bool + [x in ['NKT0','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c']]
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
mean_cellType = np.empty((len(wt.obs['cell_type'].unique()), wt.raw.shape[1]),
dtype=float, order='C')
raw_adata = wt.raw.X.toarray()
for i in range(0, len(wt.obs['cell_type'].unique())):
#print(adata.obs['phenograph'].unique()[i])
mean_cellType[i,:] = np.mean(raw_adata[wt.obs['cell_type'] == wt.obs['cell_type'].unique()[i], :], axis = 0)
mean_df = pd.DataFrame(np.corrcoef(mean_cellType), index = wt.obs['cell_type'].unique(), columns = wt.obs['cell_type'].unique())
import seaborn
seaborn.palplot(sns.color_palette("Blues"))
ax = seaborn.clustermap(mean_df, cmap="Blues")
ax.savefig("figure.2a.pdf")